{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import glob\n",
    "import numpy as np\n",
    "import mahotas as mh\n",
    "from mahotas.features import surf\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import *\n",
    "from sklearn.cluster import MiniBatchKMeans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "all_instance_filenames = []\n",
    "all_instance_targets = []\n",
    "\n",
    "for f in glob.glob('cats-and-dogs-img/*.jpg'):\n",
    "    target = 1 if 'cat' in os.path.split(f)[1] else 0\n",
    "    all_instance_filenames.append(f)\n",
    "    all_instance_targets.append(target)\n",
    "\n",
    "surf_features = []\n",
    "for f in all_instance_filenames:\n",
    "    image = mh.imread(f, as_grey=True)\n",
    "    surf_features.append(surf.surf(image)[:, 5:])\n",
    "\n",
    "train_len = int(len(all_instance_filenames) * .60)\n",
    "X_train_surf_features = np.concatenate(surf_features[:train_len])\n",
    "X_test_surf_feautres = np.concatenate(surf_features[train_len:])\n",
    "y_train = all_instance_targets[:train_len]\n",
    "y_test = all_instance_targets[train_len:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.6056733 ,  2.70938102,  1.22470857, ...,  0.40240388,\n",
       "         1.36376676,  0.91444056],\n",
       "       [ 1.17256268,  2.15959095,  1.80512123, ...,  1.25544983,\n",
       "         2.14938607,  0.92937648],\n",
       "       [ 4.05884662,  1.87604644,  5.28951557, ...,  4.32944494,\n",
       "         5.41296044,  3.89081466],\n",
       "       ..., \n",
       "       [ 0.6193189 ,  2.92864247,  1.1535589 , ...,  0.36941273,\n",
       "         1.18161751,  1.09170526],\n",
       "       [ 1.68619226,  3.95702531,  0.93771461, ...,  1.37208184,\n",
       "         0.80844426,  2.08232525],\n",
       "       [ 1.09366926,  1.87174791,  1.99117652, ...,  1.12510896,\n",
       "         2.15558684,  1.0511277 ]])"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_clusters = 300\n",
    "estimator = MiniBatchKMeans(n_clusters=n_clusters)\n",
    "estimator.fit_transform(X_train_surf_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_train = []\n",
    "for instance in surf_features[:train_len]:\n",
    "    clusters = estimator.predict(instance)\n",
    "    features = np.bincount(clusters)\n",
    "    if len(features) < n_clusters:\n",
    "        features = np.append(features, np.zeros((1, n_clusters-len(features))))\n",
    "    X_train.append(features)\n",
    "\n",
    "X_test = []\n",
    "for instance in surf_features[train_len:]:\n",
    "    clusters = estimator.predict(instance)\n",
    "    features = np.bincount(clusters)\n",
    "    if len(features) < n_clusters:\n",
    "        features = np.append(features, np.zeros((1, n_clusters-len(features))))\n",
    "    X_test.append(features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.69      0.77      0.73       378\n",
      "          1       0.77      0.69      0.72       420\n",
      "\n",
      "avg / total       0.73      0.72      0.72       798\n",
      "\n"
     ]
    }
   ],
   "source": [
    "clf = LogisticRegression(C=0.001, penalty='l2')\n",
    "clf.fit(X_train, y_train)\n",
    "predictions = clf.predict(X_test)\n",
    "\n",
    "print(classification_report(y_test, predictions))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
